SenseMyFEUP data

Data loading

Data is filtered by travelmode (car and bus) and date (April 2016).

Intersession times

Visualizing intersession times

All intersession time

Intersession time along the week

We recommend that you use the dev version of ggplot2 with `ggplotly()`
Install it with: `devtools::install_github('hadley/ggplot2')`

Histogram intersession time.

We recommend that you use the dev version of ggplot2 with `ggplotly()`
Install it with: `devtools::install_github('hadley/ggplot2')`
We recommend that you use the dev version of ggplot2 with `ggplotly()`
Install it with: `devtools::install_github('hadley/ggplot2')`

Number of sessions per way_id

We recommend that you use the dev version of ggplot2 with `ggplotly()`
Install it with: `devtools::install_github('hadley/ggplot2')`

Filtering by sessions per way_id

ECDF

ECDF by #sessions

Lowest 35%

We recommend that you use the dev version of ggplot2 with `ggplotly()`
Install it with: `devtools::install_github('hadley/ggplot2')`
ggplotly(c1) 
We recommend that you use the dev version of ggplot2 with `ggplotly()`
Install it with: `devtools::install_github('hadley/ggplot2')`
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Removed 248 rows containing non-finite values (stat_bin).

Top 10% points

Map points April 2016

Showing maps

Traffic Map April Porto 2016 all

Traffic Map April 2016 all >50

Traffic day Map April 2016

Traffic night Map April 2016

Top 12 edges

Map Intersession time all 1.30h.

Map Intersession time (<1.3h) day.

Map Intersession time (<2h)night.

By date >50

Speeds >50

# Speed by way_id and session.\
avg_speed_wayid <- df_speed %>% 
  subset(way_id %in% df_hotedges_april16pt$way_id) %>% 
  group_by(way_id) %>% 
  summarise(avg_speed = mean((speed*18)/5), n = n() )

ggplot(avg_speed_wayid, aes(avg_speed)) +
  geom_histogram(binwidth = 5) + 
  scale_x_continuous(name = "Avg_speed(km/h)",  breaks = seq(0, 150, 10) )

Speed by week

Speed by day

Speed by hour

Number of session per hour

Ways_ids per half hour

Time serie per way_id

df_superhotedges_april16pt %>% 
  filter( way_id == list_osm_edge[2] ) %>% 
  plot_ly(x= ~time, y= ~(speed*18)/5, color = ~class) %>% 
  layout(title = paste("Time series of speed for way_id",list_osm_edge[2], sep = " " ),yaxis = list(title="Speed (km/h)"),
         xaxis = list(title="Date"))
No trace type specified:
  Based on info supplied, a 'scatter' trace seems appropriate.
  Read more about this trace type -> https://plot.ly/r/reference/#scatter
No scatter mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
minimal value for n is 3, returning requested palette with 3 different levels
No trace type specified:
  Based on info supplied, a 'scatter' trace seems appropriate.
  Read more about this trace type -> https://plot.ly/r/reference/#scatter
No scatter mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
minimal value for n is 3, returning requested palette with 3 different levels
---
title: 'Sense My FEUP - April 2016 Data'
author: "Daniela S. Gil"
output: html_notebook
---

```{r echo = FALSE, eval=FALSE}
save.image("SensemyWorkSpace.RData")
#load("df_first_session_cars_april16.Rda")
load("SensemyWorkSpace.RData")

```


# SenseMyFEUP data
## Data loading
 Data is filtered by travelmode (car and bus) and date (April 2016).      

```{r echo=FALSE}
#Filtering by rectangle in Porto 

df_edges_april16 <- df_speed

df_edges_april16pt <- df_speed %>% 
  filter(lat < 41.1859352808155, lat > 41.1364726546, lon > -8.6912940681405, lon < -8.55396934228)
```

## Intersession times

### Visualizing intersession times
```{r echo=FALSE, eval=FALSE}

#Getting data from df_speed

df_intersession_april16pt <- df_edges_april16pt %>% 
  group_by(way_id, session_id) %>% 
  summarise(seconds = min(seconds))

# Transforming seconds to timestamp and calculating intersession time.

df_intersession_april16pt$time <- as.POSIXct(df_intersession_april16pt$seconds, origin="1970-01-01")
df_edges_april16pt$time <- as.POSIXct(df_edges_april16pt$seconds, origin="1970-01-01")

df_intersession_april16pt <- df_intersession_april16pt  %>%
  arrange(desc(way_id), time) %>% 
  mutate(intersession_time = c(0,as.numeric(diff(time), units="mins")))

# Remove Min column
# df_intersession_april$min <- NULL 
```

All intersession time 
```{r echo=FALSE}
plot_ly(y = df_intersession_april16pt$intersession_time, type = "box", name = "All way_ids") %>% 
  add_boxplot(y= filter(df_intersession_april16pt, intersession_time > 0 )$intersession_time, name = "At least 2 sessions") %>% 
  layout(title="Intersession time (mins)", yaxis = list(range = c(0,5000)))

```

Intersession time along the week 
```{r echo=FALSE}
p <- ggplot(df_intersession_april16pt, aes(x = weekdays(time), y = intersession_time, fill = interaction(weekdays(time)) )) + 
  geom_boxplot() + 
  xlab("Days of the week" ) +
  ylab("Intersession time (mins)") +
  ggtitle("Intersession time(min) in the week ") + 
  theme(legend.position="none") +
    scale_x_discrete(limits = c("Domingo", "Segunda", "Terça", "Quarta", "Quinta", 
    "Sexta", "Sábado")) +
  coord_cartesian(ylim = c(0,10000))


p <- plotly_build(p)
p

```

Histogram intersession time.
```{r echo=FALSE}

int2 <- df_intersession_april16pt %>%
  filter(intersession_time > 0) %>% 
  ggplot(aes(intersession_time/60)) + 
  geom_histogram(binwidth = 9) + 
  ggtitle("Intersession time (hours)") +
  coord_cartesian(xlim=c(0,300))
int2 <- ggplotly(int2)

int3 <- df_intersession_april16pt %>%
  filter(intersession_time > 0) %>% 
  ggplot(aes(intersession_time/60)) + 
  geom_histogram(binwidth = 1) +
  coord_cartesian(xlim = c(0,25))
int3 <- ggplotly(int3)

subplot(int2,int3, nrows = 2)
```

Number of sessions per way_id

```{r echo=FALSE,  message=FALSE}
int1 <- df_intersession_april16pt %>% 
  group_by(way_id) %>% 
  summarise(n_sessions = n()) %>%  
  ggplot(aes(x =n_sessions)) +
  geom_histogram(binwidth = 10) +
  coord_cartesian(xlim = c(0,150)) +
  ggtitle("Number of sessions per way_id in Porto") +
  geom_vline(xintercept = 50, size = 1, colour = "#FF3721",
                   linetype = "dashed")

ggplotly(int1)
```

### Filtering by sessions per way_id 

```{r echo=FALSE}
#Create df with way_ids with >50 sessions

df_id_hotedges_april16pt <- df_intersession_april16pt %>% 
  group_by(way_id) %>% 
  summarise(n_sessions = n()) %>%  
  filter(n_sessions >= 200) 

df_hotedges_april16pt <- df_id_hotedges_april16pt %>% 
  merge(y= df_edges_april16pt, by="way_id")

df_superhotedges_april16pt <- df_id_hotedges_april16pt %>% 
  merge(y= df_edges_april16pt, by="way_id")

```

```{r echo=FALSE, eval=FALSE} 
# POINTS for mapping

#All with sessions > 50
df_points_hotedges_april16pt <- df_intersession_april16pt %>% 
  group_by(way_id) %>% 
  summarise(n_sessions = n()) %>% 
  filter(n_sessions >= 200) %>% 
  merge( y = df_points_edge_osm_april16pt, by = "way_id") 

df_points_superhotedges_april16pt <- df_intersession_april16pt %>% 
  group_by(way_id) %>% 
  summarise(n_sessions = n()) %>% 
  filter(n_sessions >= 200) %>% 
  merge( y = df_points_edge_osm_april16pt, by = "way_id") 


df_superhotedges_april16pt <-mutate(df_superhotedges_april16pt, class = ifelse(hour(time) >=7 & hour(time) <= 20, 
                        "day",
                        "night"))


#Day
df_points_hotedges_april16pt_d <- df_hotedges_april16pt %>% 
  filter(hour(time) >=7, hour(time)<= 20, n_sessions >= 50) %>% 
  merge( y = df_points_edge_osm_april16pt, by = "way_id") 

# Night
df_points_april16pt_night_n <- df_hotedges_april16pt %>% 
  filter(hour(time) <7 | hour(time) > 20, n_sessions >= 20) %>% 
  merge( y = df_points_edge_osm_april16pt, by = "way_id") 

```

```{r echo=FALSE}
df_intersession_april16pt <- mutate(df_intersession_april16pt, class = ifelse(hour(time) >=7 & hour(time) <= 20, 
                        "day",
                        "night"))
  
```


```{r echo=FALSE, eval=FALSE}
#Necessary for mapping 

#Reorder columns
df_osm_edge <- df_points_hotedges_april16pt %>% 
  select(way_id, points)

df_osm_edge <- df_points_superhotedges_april16pt %>% 
  select(way_id, points)

#List before passing to map.
list_osm_edge <- df_osm_edge[, 1]

```

### ECDF 
```{r echo=FALSE}
e1 <- ggplot(subset(df_intersession_april16pt, intersession_time > 0), aes(intersession_time)) + 
  stat_ecdf(geom = "step") +
  xlab("Intersession time(mins)")

e2 <- ggplot(subset(df_intersession_april16pt, intersession_time > 0), aes(intersession_time)) + 
  scale_x_log10() +stat_ecdf(geom = "step")  + xlab("Intersession time Log")

grid.arrange(e1, e2, ncol= 2, top = "Intersession time all sessions Porto (mins)")
```
 
```{r echo=FALSE, eval=FALSE}
e3 <-  ggplot(df_hotedges_april16pt_day, aes(avg_itm)) + 
  scale_x_log10(breaks = seq(0,1000,200)) +
  stat_ecdf(geom = "step")  + 
  xlab(" Log Intersession time(min)") 
  
e4 <-  ggplot(df_hotedges_april16pt_night, aes(x = avg_itm)) + 
  scale_x_log10(breaks = seq(0,1000,200)) +
  stat_ecdf(geom = "step")  +
  xlab(" Log Intersession time(min)") 

grid.arrange(e3, e4, ncol= 2, top  = "ECDF Average Intersession time day(>50) ")
```

```{r echo=FALSE, eval=FALSE}
# Classifying points by day for ECDF 
df_intersession_april16pt <-mutate(df_intersession_april16pt, class = ifelse(hour(time) >=7 & hour(time) <= 20, 
                        "day",
                        "night"))

df_intersession_april16pt %>% 
  filter(intersession_time >0) %>% 
  subset(way_id %in% df_hotedges_april16pt$way_id) %>% 
  ggplot( aes(intersession_time, color= class )) + 
  scale_x_log10() + stat_ecdf(geom = "step") + 
  ggtitle("Intersession time >50 sessions Porto")
```


```{r  echo = FALSE, eval = FALSE }
ECDF all by #points 
ecdf_all <- ggplot(df_osm_edge, aes(points)) + 
  scale_x_log10() + stat_ecdf(geom = "step") + xlab("All points")

ecdf_day<- ggplot(df_osm_edge, aes(points)) + 
  scale_x_log10() + stat_ecdf(geom = "step") + title("ECDF day")+ xlab("Points at day")

ecdf_night <- ggplot(df_osm_edge, aes(points)) + 
  scale_x_log10() + stat_ecdf(geom = "step") + title("ECDF night")+ xlab("Points at night")

grid.arrange(ecdf_all,ecdf_day,ecdf_night,  ncol=3)

```


ECDF by #sessions 

```{r echo=FALSE}
df_intersession_april16pt %>% 
  group_by(way_id) %>% 
  summarise(sessions = n() ) %>% 
  ggplot(aes(sessions)) + 
  scale_x_log10() + stat_ecdf(geom = "step") +
  ggtitle("ECDF number of sessions all Porto")
  
```

### Lowest 35% 


```{r echo=FALSE, eval=TRUE}
low1 <- df_intersession_april16pt %>% 
  subset(way_id %in% df_hotedges_april16pt$way_id) %>% 
  subset( intersession_time < quantile(df_intersession_april16pt$intersession_time, 0.35)) %>% 
  ggplot(aes(intersession_time)) +
  geom_histogram(bins = 10) + 
  scale_x_continuous(breaks = seq(0,200,10))+
  xlab("Intersession time (mins)") + 
  ggtitle("Lowest 35% intersession time >50")

ggplotly(low1)
```


```{r}
c1 <- df_intersession_april16pt %>% 
  subset(way_id %in% df_hotedges_april16pt$way_id) %>% 
  ggplot(aes(intersession_time, fill = class)) + 
  geom_histogram() + 
  xlab("Intersession time (min)") +
  xlim(c(0,3000)) +
  ylim(c(0,5000)) +
  ggtitle("Intersession time >50 day/night")

ggplotly(c1) 
```



Top 10% points
```{r echo = FALSE, eval=TRUE}
summary(df_points_hotedges_april16pt$points)
quantile(df_points_hotedges_april16pt$points, 0.90)

 boxplot(df_points_hotedges_april16pt$points)  


```


## Map points April 2016

```{r echo=FALSE, eval=FALSE}
# Creating the empty map of Porto

#Points
#boxplot(df_osm_edge$points)

feup <-   quantile(df_osm_edge$points, 0.9)
superior <- quantile(df_osm_edge$points, 0.7)
medio <- quantile(df_osm_edge$points, 0.5)
low <- quantile(df_osm_edge$points, 0.5)
low <- 0

m <- leaflet() %>% setView(lng=-8.61419, lat=41.16311, zoom = 13)
m <- addTiles(m) 
m <- addProviderTiles(m, "CartoDB.Positron")


counter <- 1

for(way_id in list_osm_edge) {
  
  df_way_id <- dbGetQuery(con_osm, paste0("SELECT st_astext(st_transform(way, 4326)) AS line FROM planet_osm_line WHERE planet_osm_line.osm_id = ", way_id))
  
  line <- df_way_id$line
  line <- as.character(line)
  
  line <- unlist(strsplit(line, split='(', fixed=TRUE))[2]
  line <- substr(line, 1, nchar(line) - 1)
  
  parsed_line <- strsplit(line, ",")
  
  lons <- c()
  lats <- c()
  
  if(length(parsed_line) != 0) {
    
    #Defining lons and lats as variables to use later.

    for(coord in parsed_line[[1]]) {
      
      lon <- unlist(strsplit(coord, split=' ', fixed=TRUE))[1]
      lat <- unlist(strsplit(coord, split=lon, fixed=TRUE))[2]
      lat <- substr(lat, 2, nchar(lat))
      
      lon <- as.numeric(lon)
      lat <- as.double(lat)
      
      lons <- c(lons, lon)
      lats <- c(lats, lat)
      
    }
    
    # Deciding the color of the point.

    if(df_osm_edge[counter, 2] > feup) {
          
          m <- addPolylines(m, lons, lats, color='blue', popup = paste("", way_id, sep = "")) 
    
        } else if (df_osm_edge[counter, 2] >= superior && df_osm_edge[counter, 2] <= feup) {
          
          m <- addPolylines(m, lons, lats, color='red', popup = paste("", way_id, sep = ""))  
        
        } else if (df_osm_edge[counter, 2] >= medio && df_osm_edge[counter, 2] <= superior) {
          
          m <- addPolylines(m, lons, lats, color='yellow', popup = paste("", way_id, sep = ""))
          
        } else if (df_osm_edge[counter, 2] >= low && df_osm_edge[counter, 2] <= medio) {
          
          m <- addPolylines(m, lons, lats, color='green', popup = paste("", way_id, sep = ""))
          
        }
        
        counter <- counter + 1 
        
      }
      
      #content <- paste("w", way_id, sep = "")
      #m <- addPopups(m, lons, lats, content, options = popupOptions(closeButton = TRUE))
      #m <- addLabelOnlyMarkers(m, lons,lats, label = paste("", way_id, sep = ""), 
      #                         labelOptions = labelOptions(noHide = F, textsize = "15px"))
      #print(line)
    
    }

```
### Showing maps 

Traffic  Map April Porto 2016 all
```{r eval= TRUE}
# Showing Map.
m1 <- m
m1
#mapshot(m, url = paste0(getwd(), "/map.html"))
```

Traffic  Map April 2016 all >50
```{r echo=FALSE}
m_50 <- m
m_50 
```

Traffic day Map April 2016
```{r echo=FALSE}
m_50_day <- m
m_50_day
```

Traffic night Map April 2016
```{r echo=FALSE}
m_20_night <- m
m_20_night
```

Top 12 edges 

```{r echo=FALSE}
m_hot <- m
m_hot
```


Map Intersession time all 1.30h.
```{r echo=FALSE}
m_itm <- m
m_itm
```

Map Intersession time (<1.3h) day.
```{r echo=FALSE}
m_itm_day <- m
m_itm_day
#mapshot(m_night_semsessions, file = "~/maps/top_intersessions_night_april.png")
```
Map Intersession time (<2h)night.
```{r echo=FALSE}
m_itm_night <- m
m_itm_night
#mapshot(m, file = "~/maps/_all_april.png")
```




## By date >50

```{r echo= FALSE} 
df_intersession_april16pt %>% 
  subset(way_id %in% df_hotedges_april16pt$way_id) %>% 
  group_by(weekday =wday(time)) %>% 
  summarise(way_ids = n()) %>% 
  ggplot(aes(weekday,way_ids)) + 
  geom_line() +
  scale_x_discrete(limits = c("Domingo", "Segunda", "Terça", "Quarta", "Quinta", 
    "Sexta", "Sábado"))

```

```{r echo= FALSE}
df_intersession_april16pt %>% 
  subset(way_id %in% df_hotedges_april16pt$way_id) %>% 
  group_by(day =day(time)) %>% 
  summarise(n = n()) %>% 
  ggplot(aes(day,n)) + 
  geom_line() + 
  scale_x_continuous(breaks = seq(1,30,1))

```

```{r echo= FALSE} 
df_intersession_april16pt %>% 
  subset(way_id %in% df_hotedges_april16pt$way_id) %>% 
  group_by(hour =hour(time)) %>% 
  summarise(n = n()) %>% 
  ggplot(aes(hour,n)) + 
  geom_line() +
  scale_x_continuous(breaks = seq(0,23,1))

```

## Speeds >50

```{r}
# Speed by way_id and session.\
avg_speed_wayid <- df_speed %>% 
  subset(way_id %in% df_hotedges_april16pt$way_id) %>% 
  group_by(way_id) %>% 
  summarise(avg_speed = mean((speed*18)/5), n = n() )

ggplot(avg_speed_wayid, aes(avg_speed)) +
  geom_histogram(binwidth = 5) + 
  scale_x_continuous(name = "Avg_speed(km/h)",  breaks = seq(0, 150, 10) )
```

Speed by week 
```{r echo=FALSE}
df_hotedges_april16pt %>% 
  group_by(weekday =wday(time)) %>% 
  summarise(avg_speed = mean((speed*18)/5)) %>% 
  ggplot(aes(weekday, avg_speed)) + 
  geom_line() +
  scale_x_discrete(limits = c("Domingo", "Segunda", "Terça", "Quarta", "Quinta", 
    "Sexta", "Sábado")) +
  ylab("Avg speed km/h")
```

Speed by day

```{r echo=FALSE}
df_hotedges_april16pt %>% 
  group_by(day =day(time)) %>% 
  summarise(avg_speed = mean((speed*18)/5)) %>% 
  ggplot(aes(day, avg_speed)) + 
  geom_line() +
  scale_x_continuous(breaks = seq(0,30,1))+
  ylab("Avg speed km/h")
```

Speed by hour
```{r echo=FALSE}
df_hotedges_april16pt %>% 
  group_by(hour =hour(time)) %>% 
  summarise(avg_speed = mean((speed*18)/5)) %>% 
  ggplot(aes(hour, avg_speed)) + 
  geom_line() +
  scale_x_continuous(breaks = seq(0,23,1))+
  ylab("Avg speed km/h")
```

Number of session per hour
```{r echo=FALSE}
df_hotedges_april16pt %>% 
    group_by(hour =hour(time)) %>% 
    summarise(sessions = n_distinct(session_id)) %>% 
  ggplot(aes(hour, sessions)) + 
  geom_line()+
  scale_x_continuous(breaks = seq(0,23,1))
```

Ways_ids per half hour
```{r}
prueba <- table(cut(filter(df_hotedges_april16pt, day(time) < 7)$time, breaks = "30 mins"))
plot(prueba, xlab = "date", ylab = "frequency") 
```

## Time serie per way_id 

```{r echo=FALSE}
df_superhotedges_april16pt %>% 
  filter( way_id == list_osm_edge[3] ) %>% 
  plot_ly(x= ~time, y= ~(speed*18)/5, color = ~class) %>% 
  layout(title = paste("Time series of speed for way_id",list_osm_edge[3], sep = " " ),yaxis = list(title="Speed (km/h)"),
         xaxis = list(title="Date"))

```

```{r}
filter(df_hotedges_april16pt, way_id == 37141967 ) %>% 
  group_by(day(time)) %>% 
  ggplot(aes(x= time, y= (speed*18)/5)) + 
  geom_line()
```




